################################################################################
#                                                                              #
#        Replication script for analysis contained in:                         #
#        BRIEF OF AMICUS CURIAE FAIR TRIAL ANALYSIS, LLC                       #
#        Filed April 25, 2025 in ANDREW V. TINSLEY                                #
#        U.S. Court of Appeals for the 10th Circuit, Case No. 15-6190          #
#        W.D. Okla. D. Ct. No. 08-CV-00832-R                                   #
#                                                                              #
################################################################################



################## Set-up machine and load datasets ############################

# To begin, install the following packages: sate, RCPA3
# download Andrew_study_dataset.csv and Andrew_respondents_dataset.csv to working directory

rm(list=ls())  # starting with clean environment
library(sate)  # make sure you're using version 2.3.0
library(RCPA3)
# set working directory to location of downloaded datasets on your machine
setwd("C:/Users/Barry Edwards/Documents/andrew v white case") # edit for your machine

#### Load datasets
surveydata <- read.csv("Andrew_study_dataset.csv")
respondentdata <- read.csv("Andrew_respondents_dataset.csv")


########################## Set-up for analysis #################################
# names(surveydata) 

# Not analyzing preview/testing data or responses that fail fraud checks
surveydata <- subset(surveydata, Status!="Survey Preview")
surveydata <- subset(surveydata, Q_RelevantIDFraudScore < 30)
# https://www.qualtrics.com/support/survey-platform/survey-module/survey-checker/fraud-detection/#RelevantID
# 1525/1589 passed fraud check

surveydata$initial_assignment[surveydata$random_assignment==1] <- "Actual"
surveydata$initial_assignment[surveydata$random_assignment==2] <- "Hypothetical"

# Code respondent guilt votes in actual and hypothetical conditions
# Q46 is guilt vote in initial condition
# Q45 is vote in cross-over condition
surveydata$vote_in_actual <- surveydata$vote_in_hypo <- NA
surveydata$vote_in_actual[surveydata$initial_assignment=="Actual"] <- surveydata$Q46[surveydata$initial_assignment=="Actual"]
surveydata$vote_in_actual[surveydata$initial_assignment=="Hypothetical"] <- surveydata$Q45[surveydata$initial_assignment=="Hypothetical"]
surveydata$vote_in_hypo[surveydata$initial_assignment=="Actual"] <- surveydata$Q45[surveydata$initial_assignment=="Actual"]
surveydata$vote_in_hypo[surveydata$initial_assignment=="Hypothetical"] <- surveydata$Q46[surveydata$initial_assignment=="Hypothetical"]

# Code respondent sentencing votes in actual and hypothetical conditions
# Q26 is sentencing vote in initial condition
# Q31 is sentencing vote in cross-over condition
surveydata$Q26[surveydata$Q26=="Death penalty. I think the aggravating factors outweigh the mitigating factors."] <- "Death penalty"
surveydata$Q26[surveydata$Q26=="Life imprisonment. I think the mitigating factors outweigh the aggravating factors."] <- "Life imprisonment"
surveydata$Q31[surveydata$Q31=="Death penalty. I think the aggravating factors outweigh the mitigating factors."] <- "Death penalty"
surveydata$Q31[surveydata$Q31=="Life imprisonment. I think the mitigating factors outweigh the aggravating factors."] <- "Life imprisonment"
surveydata$sentence_in_actual <- surveydata$sentence_in_hypo <- NA
surveydata$sentence_in_actual[surveydata$initial_assignment=="Actual"] <- surveydata$Q26[surveydata$initial_assignment=="Actual"]
surveydata$sentence_in_actual[surveydata$initial_assignment=="Hypothetical"] <- surveydata$Q31[surveydata$initial_assignment=="Hypothetical"]
surveydata$sentence_in_hypo[surveydata$initial_assignment=="Actual"] <- surveydata$Q31[surveydata$initial_assignment=="Actual"]
surveydata$sentence_in_hypo[surveydata$initial_assignment=="Hypothetical"] <- surveydata$Q26[surveydata$initial_assignment=="Hypothetical"]

# the merge variable must match, is case sensitive
surveydata$ParticipantId <- surveydata$participantId


############### Identify jury-qualified respondents

# qualifying the jurors
# Q10 asks about standard qualifications
# Q16 asks about excluded occupations
# Q18 asks about following death penalty instructions
# freqC(surveydata$Q18)
isQualified <- (surveydata$Q10=="Yes" & surveydata$Q16=="No" 
                & (surveydata$Q18!="It should never be used. I would never support putting someone to death regardless of what they did." &
                   surveydata$Q18!="Murderers should always be put to death regardless of how sorry they are now or how rough their life has been."))


################### calculate sampling weights for analysis
# encode Cloud Research respondent data in appropriate format
respondentdata <- encode.cloud.respondent.variables(dataset = respondentdata)

# define the demographics of the target population (i.e. nationally representative)
target.demog <- target.population.demographics("OK")
# not using OK level data, using Oklahoma Co. data instead
target.demog$ba_or_more <- .320 
target.demog$woman <- .517
target.demog$hispanic <- .204
target.demog$black <- .138
target.demog$hhincome_over50k <- .621 
target.demog$age35plus <- .676 

respondentdata <- weights_for_population(respondentdata = respondentdata, targetdata = target.demog)
####  process the respondentdata for weighting before merging it with survey data
surveydata <- merge(surveydata, respondentdata, by = c("ParticipantId"), all.x=TRUE)
# there may be some survey responses that don't match to a respondent in merged dataset
# to be safe, code those survey responses with mean weight  
surveydata$weights[is.na(surveydata$weights)] <- mean(surveydata$weights, na.rm=T)
# adjusting for losing observations from jury qualifications, make sure mean = 1
surveydata$weights.qual <- surveydata$weights * (1 / mean(surveydata$weights[isQualified]))
# weights.qual is the variable used for weighting observations 


##################### Representativeness of Sample #############################

## check representative of raw, weighted, and qualified & weighted samples
## results reported as Table 1

demog.variables <- c("ba_or_more", "woman", "hispanic", "black", "hhincome_over50k", "age35plus")
# Butts County, Georgia is reference (target.demog)
# weighted sample
fun <- function(x, w) wtd.mean(x, w=surveydata$weights)
apply(X = surveydata[, demog.variables], FUN = fun, MARGIN = 2)
# unweighted sample
fun <- function(x) wtd.mean(x)
apply(X = surveydata[, demog.variables], FUN = fun, MARGIN = 2)
# qualified & weighted sample
fun <- function(x, w) wtd.mean(x, w=surveydata$weights[isQualified])
apply(X = surveydata[isQualified, demog.variables], FUN = fun, MARGIN = 2)
# sum(isQualified)


############## Guilt Phase: Analyze and Compare Verdict Preferences ############

voted_on_verdict <- (surveydata$vote_in_actual != "")

# find P(g|actual) and P(g|hypo)
actual_guilt_table <- freqC(surveydata$vote_in_actual[isQualified & voted_on_verdict], 
      w=surveydata$weights.qual[isQualified & voted_on_verdict], plot=F, digits=2)
hypo_guilt_table <- freqC(surveydata$vote_in_hypo[isQualified & voted_on_verdict], 
      w=surveydata$weights.qual[isQualified & voted_on_verdict], plot=F, digits=2)

pg_actual <- actual_guilt_table["Guilty","Percent"]/100
pg_hypo   <- hypo_guilt_table["Guilty","Percent"]/100
n_actual <- as.numeric(actual_guilt_table["Total","Frequency"])
n_hypo   <- as.numeric(hypo_guilt_table["Total","Frequency"])

# Compare JUROR stats on guilt vote
compare.juror.stats(pg_actual = pg_actual, n_actual = n_actual, pg_hypo = pg_hypo, n_hypo = n_hypo )
# Compare JURY stats on guilt vote
guilt_verdict_stats <- compare.jury.stats(pg_actual = pg_actual, n_actual = n_actual, pg_hypo = pg_hypo, n_hypo = n_hypo ,
                   pstrikes=9, dstrikes=9); guilt_verdict_stats
# Per Okla Stat 22-655, both sides 9 strikes in first degree murder trial


######### Sentencing Phase: Analyze and Compare Sentencing Preferences #########

# find P(d|actual) and P(d|hypo)
actual_sentence_table <- freqC(surveydata$sentence_in_actual[isQualified], 
                               w=surveydata$weights.qual[isQualified], plot=F)
hypo_sentence_table <- freqC(surveydata$sentence_in_hypo[isQualified], 
                             w=surveydata$weights.qual[isQualified], plot=F)

pd_actual <- actual_sentence_table["Death penalty","Percent"]/100
pd_hypo   <- hypo_sentence_table["Death penalty","Percent"]/100
n_actual <- as.numeric(actual_sentence_table["Total","Frequency"])
n_hypo   <- as.numeric(hypo_sentence_table["Total","Frequency"])

# Compare juror stats on sentencing vote, reported in Table 2
compare.juror.stats(pg_actual = pd_actual, n_actual = n_actual, pg_hypo = pd_hypo, n_hypo = n_hypo)

# Controlled effect table, Table 3
crosstabC(dv=surveydata$sentence_in_hypo[isQualified], iv=surveydata$sentence_in_actual[isQualified], 
          w=surveydata$weights.qual[isQualified], ivlabs = c("Death | Actual", "Life | Actual"), 
          dvlabs = c("Death | Hypo", "Life | Hypo"), 
          digits=1, plot = F)

# Compare jury verdict probabilities
death_verdict_stats <- compare.jury.stats(pg_actual = pd_actual, n_actual = n_actual, pg_hypo = pd_hypo, n_hypo = n_hypo, 
                   jury_n = 12, pstrikes=9, dstrikes=9); death_verdict_stats



######################## Graphic Showing Change in Probabilities ###############

## Graphic illustrates change in verdict probabilities
## this is Figure 1 in brief
png(file="effect on sentencing probabilities.png", width=6.5, height=3.5, 
    units="in", type="cairo", pointsize=11, res=300, antialias="default")
graphics::par(mar=c(2.5,2.5,1.5,.5), mfrow=c(1,2), omi=base::c(0.1,0.1,0.1,0.1), family="serif")
basic.plot.grid(xlab="",ylab="", main="")
mtext(side=1, text = "Jurors' Support for Death Sentence", cex = .8, line = 1.5)
mtext(side=2, text = "Probability of Death Sentence", cex = .8, line = 1.5)
mtext(side=3, text = "(a) 2025 Survey Data", line = .5, font = 2)
lines(x = seq(0,1,.01), y = as.jury.point(sample_pg = seq(0,1,.01), jury_n = 12, pstrikes = 9, dstrikes = 9))
sate:::plot.ellipse(pg=pd_actual, n=n_actual, point.col="gray25", pstrikes=9, dstrikes=9)
sate:::plot.ellipse(pg=pd_hypo, n=n_hypo, point.col="white", pstrikes=9, dstrikes=9)
graphics::legend(x=.50, y=.2, legend=base::c("Actual Trial", "Hypothetical Trial", "Confidence Interval"),
                 col=base::c("gray25", "black", "gray25"),
                 pch=base::c(16, 1, -1), lty=c(-1, -1, 3), 
                 cex=.7, pt.cex=base::c(1.2, 1.2, -1),
                 box.col="#FFFFFF60", bg="#FFFFFF60")

constant_shift <- .20
basic.plot.grid(xlab="", ylab="", main="")
mtext(side=1, text = "Jurors' Support for Death Sentence", cex = .8, line = 1.5)
mtext(side=2, text = "Probability of Death Verdict", cex = .8, line = 1.5)
mtext(side=3, text = "(b) Twenty Years Ago", line = .5, font = 2)
lines(x = seq(0,1,.01), y = as.jury.point(sample_pg = seq(0,1,.01), jury_n = 12, pstrikes = 9, dstrikes = 9))
sate:::plot.ellipse(pg=pd_actual+constant_shift, n=n_actual, point.col="gray25", pstrikes=9, dstrikes=9)
sate:::plot.ellipse(pg=pd_hypo+constant_shift, n=n_hypo, point.col="white", pstrikes=9, dstrikes=9)
graphics::legend(x=.50, y=.2, legend=base::c("Actual Trial", "Hypothetical Trial", "Confidence Interval"),
                 col=base::c("gray25", "black", "gray25"),
                 pch=base::c(16, 1, -1), lty=c(-1, -1, 3),
                 cex=.7, pt.cex=base::c(1.2, 1.2, -1),
                 box.col="#FFFFFF60", bg="#FFFFFF60")
dev.off()



######################## Graphic Comparing Other Cases #########################

# guilt_verdict_stats and death_verdict_stats defined above
death_verdict_stats_then <- compare.jury.stats(pg_actual = pd_actual+constant_shift, n_actual = n_actual, 
                   pg_hypo = pd_hypo+constant_shift, n_hypo = n_hypo, 
                   jury_n = 12, pstrikes=9, dstrikes=9)

png(file="effect compared to reference cases.png", width=6.5, height=3.5, 
    units="in", type="cairo", pointsize=11, res=300, antialias="default")
graphics::par(mar=c(3,0,0,0), mfrow=c(1,1), omi=base::c(0.1,0.1,0.1,0.1), family="serif")
plot(x="", y="", xlim=c(-.3,.37), ylim=c(-3,8), axes=F, xlab="", ylab="")
abline(v=seq(0,.6, by=.012), col=paste("gray", seq(100,0,by=-2), sep=""), lwd=11)
axis(side = 1, at = c(-1,0.0,0.1,0.2,0.3,0.4,0.5), labels = c("",seq(0,.5,by=.1)), cex.axis=.8)
mtext(side = 1, text = "Estimated Probability of Different Outcome", line = 2.25, cex=.9)
# Reference cases
# Lee 
points(x=.040, y=8, pch=21, col="black", bg="black")
text(x=.040, y=8, label="Lee v. Smeal\n447 F. App'x. 357 (3d Cir. 2011)", pos=1, cex=.7)
# Fulminante
points(x=.028, y=6, pch=21, col="black", bg="white")
text(x=.028, y=6, label="AZ v. Fulminante\n499 U.S. 279 (1991)", pos=1, cex=.7)
#Porter
points(x=.304, y=7, pch=21, col="black", bg="white")
text(x=.304, y=7, label="Porter v. McCollum\n558 U.S. 130 (2009)", pos=1, cex=.7, col="black")
# Hopkins
points(x=.003, y=4, pch=21, col="black", bg="black")
text(x=.003, y=4, label="Hopkins v. Cockrell, 325 F.3d 579\n(5th Cir. 2003)", pos=1, cex=.7)
# Jennings
points(x=.286, y=5, pch=21, col="black", bg="white")
text(x=.286, y=5, label="SC v. Jennings\n716 SE.2d 91 (S.C. 2011)", pos=1, cex=.7, col="black")
# Washington
points(x=-.011, y=2, pch=21, col="black", bg="black")
text(x=-.011, y=2, label="Strickland v. Washington\n466 U.S. 668 (1984)", pos=1, cex=.7)
# Chapman
points(x=.156, y=3, pch=21, col="black", bg="white")
text(x=.156, y=3, label="Chapman v. CA\n386 U.S. 18 (1967)", pos=1, cex=.7)
abline(h=.25, col="black", lwd=1)
# Andrew case
points(x=guilt_verdict_stats$difference[1], y=-.5, pch=22, col="black", bg="black")
text(x=guilt_verdict_stats$difference[1], y=-.5, label="Andrew's Guilt Phase", pos=1, cex=.7)
points(x=death_verdict_stats$difference[1], y=-1.5, pch=22, col="black", bg="white")
text(x=death_verdict_stats$difference[1], y=-1.5, label="Andrew's Sentencing Phase", pos=1, cex=.7)
points(x=death_verdict_stats_then$difference[1], y=-2.5, pch=22, col="black", bg="white")
text(x=death_verdict_stats_then$difference[1], y=-2.5, label="Andrew's Sentencing in 2004", pos=1, cex=.7)
legend("topleft", pch=c(1,16), col=c("black", "black"), 
       legend=c("Defendant/Petitioner prevailed", "Prosecution/Respondent prevailed"), 
       box.col = "white", bty = "n", cex = .8)
dev.off()

##########################  Set-up for Text Analysis  ##########################

# Q30 and Q34 ask for comments

## output a .csv file of qualified respondent comments for automated text analysis
write.csv(x=data.frame(weight=c(surveydata$weights.qual[isQualified], surveydata$weights.qual[isQualified]), 
                       initial_assignment=c(surveydata$initial_assignment[isQualified], surveydata$initial_assignment[isQualified]),
                       sentence_actual=c(surveydata$sentence_in_actual[isQualified], surveydata$sentence_in_actual[isQualified]),
                       sentence_hypo=c(surveydata$sentence_in_hypo[isQualified], surveydata$sentence_in_hypo[isQualified]),
                       comment=c(surveydata$Q30[isQualified], surveydata$Q34[isQualified])),
          file = "andrew_survey_respondent_comments.csv")

                   